#!/usr/bin/env python
# -*- coding: utf-8 -*-
__author__ = 'moxuan'

import scrapy
from tutorial.items import BlogItem


class SegmentfaultSpider(scrapy.Spider):
    name = "segmentfault"
    allowed_domains = "segmentfault.com"
    start_urls = (
        'http://segmentfault.com/blogs/newest?page=3',
    )

    def parse(self, response):
        blogs = response.xpath('//div[@class="summary"]')

        items = []
        for blog in blogs:
            item = BlogItem()
            item['title'] = blog.xpath('h2[@class="title"]/a/text()').extract()[0]
            item['url'] = blog.xpath('h2[@class="title"]/a/@href').extract()[0]
            items.append(item)
        return items
